3  Tumor: Check patient sexes

3.1 Set up Seurat workspace

# Load libraries
library(data.table)
library(devtools)
library(presto)
library(glmGamPoi)
library(sctransform)
library(Seurat) 
library(tidyverse)
library(miQC)
library(SeuratWrappers)
library(flexmix)
library(SingleCellExperiment)
library(SummarizedExperiment)
library(readxl)
library(fishpond)
library(Matrix)
library(speckle)
library(scater)
library(patchwork)
library(vctrs)
library(alevinQC)
library(harmony)
library(scDblFinder)
library(cellXY)

# Set global options for Seurat v5 objects
options(Seurat.object.assay.version = 'v5')

3.2 Load previously saved clustered object

merged.18279.tumor.singlets <- readRDS("Tumor_scRNA_Part2.rds")

3.3 Infer sex per cell using chrX and chrY gene counts using cellXY

merged.18279.tumor.singlets[['RNA']] <- JoinLayers(merged.18279.tumor.singlets[['RNA']])
xyPredict <- classifySex(x = merged.18279.tumor.singlets@assays$RNA$counts, 
                         genome = "Hs", 
                         qc = FALSE)
Warning in asMethod(object): sparse->dense coercion: allocating vector of size
9.1 GiB
4cell/s are unable to be classified
              due to an abundance of zeroes on X and Y chromosome genes
merged.18279.tumor.singlets <- AddMetaData(merged.18279.tumor.singlets,
                                           xyPredict$prediction,
                                           col.name = "CellPredictedSex")

3.4 Assign known sexes to each sample

knownSex <- as.data.frame(cbind("Sample" = unique(merged.18279.tumor.singlets$Sample), "Sex" = c(rep("Male",3),rep("Male",3),rep("Female",1),rep("Male",1))))

knownSex
                       Sample    Sex
1 P101_Tumor_W00_2.5mgIpi_RNA   Male
2 P101_Tumor_W12_2.5mgIpi_RNA   Male
3 P101_Tumor_W20_2.5mgIpi_RNA   Male
4 P103_Tumor_W00_2.5mgIpi_RNA   Male
5 P103_Tumor_W12_2.5mgIpi_RNA   Male
6 P103_Tumor_W20_2.5mgIpi_RNA   Male
7  P104_Tumor_PD_2.5mgIpi_RNA Female
8    P108_Tumor_PD_5mgIpi_RNA   Male

3.5 Summarize cell-wise sex predictions per Sample and compare to known labels

If more than 80% of the individual cell sex predictions are consistent for a given Sample, we call that Sample as that sex, then match to known labels

# Show snippet first
rownames_to_column(merged.18279.tumor.singlets@meta.data,var="bc") %>%
    as_tibble() %>%
    dplyr::select(bc,CellPredictedSex,Sample) %>%
    group_by(Sample) %>%
    summarize(nMale = sum(CellPredictedSex=="Male"), 
        nFemale = sum(CellPredictedSex=="Female"), 
        nCells = n()
    )
# A tibble: 8 × 4
  Sample                      nMale nFemale nCells
  <chr>                       <int>   <int>  <int>
1 P101_Tumor_W00_2.5mgIpi_RNA   617     102    719
2 P101_Tumor_W12_2.5mgIpi_RNA  2301      95   2397
3 P101_Tumor_W20_2.5mgIpi_RNA   990      30   1020
4 P103_Tumor_W00_2.5mgIpi_RNA  3313     259   3572
5 P103_Tumor_W12_2.5mgIpi_RNA  3503     194   3697
6 P103_Tumor_W20_2.5mgIpi_RNA  3204     176   3380
7 P104_Tumor_PD_2.5mgIpi_RNA     64    2289   2353
8 P108_Tumor_PD_5mgIpi_RNA     2600     235   2838
# Now determine sex per sample and count matching vs non-matching labels
rownames_to_column(merged.18279.tumor.singlets@meta.data,var="bc") %>%
    as_tibble() %>%
    dplyr::select(bc,CellPredictedSex,Sample) %>%
    group_by(Sample) %>%
    summarize(nMale = sum(CellPredictedSex=="Male"), 
        nFemale = sum(CellPredictedSex=="Female"), 
        nCells = n()
    ) %>%
    ungroup() %>%
    mutate(PredictedSex = case_when(
        nMale / nCells > 0.8 ~ "Male",
        nFemale / nCells > 0.8 ~ "Female",
        T ~ "other"
        )
    ) %>%
    inner_join(knownSex,by="Sample") %>%
    summarize(nEqual = sum(PredictedSex==Sex), nNotEqual = sum(PredictedSex != Sex))
# A tibble: 1 × 2
  nEqual nNotEqual
   <int>     <int>
1      8         0

3.6 Since everything matches we now add Sex as a metadata label

knownSexByCell <- enframe(merged.18279.tumor.singlets$Sample,name="bc",value="Sample") %>% 
    inner_join(knownSex,by="Sample") %>% 
    pull(Sex)
    
merged.18279.tumor.singlets <- AddMetaData(merged.18279.tumor.singlets,knownSexByCell,col.name="Sex")

3.7 Plot UMAP of sex

DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", group.by = "Sex")

3.8 Save the updated object

saveRDS(merged.18279.tumor.singlets,file="Tumor_scRNA_Part3.rds")

3.9 Get session info

sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] cellXY_0.99.0               scDblFinder_1.14.0         
 [3] harmony_1.2.0               alevinQC_1.16.1            
 [5] vctrs_0.6.5                 patchwork_1.3.0            
 [7] scater_1.30.1               scuttle_1.12.0             
 [9] speckle_1.0.0               Matrix_1.6-4               
[11] fishpond_2.6.2              readxl_1.4.3               
[13] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
[15] Biobase_2.62.0              GenomicRanges_1.54.1       
[17] GenomeInfoDb_1.38.8         IRanges_2.36.0             
[19] S4Vectors_0.40.2            BiocGenerics_0.48.1        
[21] MatrixGenerics_1.14.0       matrixStats_1.4.1          
[23] flexmix_2.3-19              lattice_0.22-6             
[25] SeuratWrappers_0.3.19       miQC_1.8.0                 
[27] lubridate_1.9.3             forcats_1.0.0              
[29] stringr_1.5.1               dplyr_1.1.4                
[31] purrr_1.0.2                 readr_2.1.5                
[33] tidyr_1.3.1                 tibble_3.2.1               
[35] ggplot2_3.5.1               tidyverse_2.0.0            
[37] Seurat_5.1.0                SeuratObject_5.0.2         
[39] sp_2.1-4                    sctransform_0.4.1          
[41] glmGamPoi_1.12.2            presto_1.0.0               
[43] Rcpp_1.0.13-1               devtools_2.4.5             
[45] usethis_3.0.0               data.table_1.16.2          

loaded via a namespace (and not attached):
  [1] fs_1.6.5                  spatstat.sparse_3.1-0    
  [3] bitops_1.0-9              httr_1.4.7               
  [5] RColorBrewer_1.1-3        profvis_0.4.0            
  [7] tools_4.3.2               utf8_1.2.4               
  [9] R6_2.5.1                  DT_0.33                  
 [11] lazyeval_0.2.2            uwot_0.2.2               
 [13] urlchecker_1.0.1          withr_3.0.2              
 [15] GGally_2.2.1              gridExtra_2.3            
 [17] progressr_0.15.1          cli_3.6.3                
 [19] spatstat.explore_3.2-6    fastDummies_1.7.3        
 [21] labeling_0.4.3            spatstat.data_3.1-4      
 [23] ggridges_0.5.6            pbapply_1.7-2            
 [25] Rsamtools_2.18.0          R.utils_2.12.3           
 [27] parallelly_1.39.0         sessioninfo_1.2.2        
 [29] limma_3.58.1              RSQLite_2.3.8            
 [31] BiocIO_1.12.0             generics_0.1.3           
 [33] gtools_3.9.5              ica_1.0-3                
 [35] spatstat.random_3.2-2     ggbeeswarm_0.7.2         
 [37] fansi_1.0.6               abind_1.4-8              
 [39] R.methodsS3_1.8.2         lifecycle_1.0.4          
 [41] yaml_2.3.10               edgeR_4.0.16             
 [43] recipes_1.1.0             SparseArray_1.2.2        
 [45] Rtsne_0.17                blob_1.2.4               
 [47] grid_4.3.2                dqrng_0.4.1              
 [49] promises_1.3.0            crayon_1.5.3             
 [51] shinydashboard_0.7.2      miniUI_0.1.1.1           
 [53] beachmat_2.18.1           cowplot_1.1.3            
 [55] KEGGREST_1.42.0           metapod_1.10.1           
 [57] pillar_1.9.0              knitr_1.45               
 [59] rjson_0.2.23              xgboost_1.7.8.1          
 [61] future.apply_1.11.3       codetools_0.2-20         
 [63] leiden_0.4.3.1            glue_1.8.0               
 [65] remotes_2.5.0             png_0.1-8                
 [67] spam_2.11-0               org.Mm.eg.db_3.18.0      
 [69] cellranger_1.1.0          gtable_0.3.6             
 [71] cachem_1.1.0              gower_1.0.1              
 [73] xfun_0.49                 prodlim_2024.06.25       
 [75] S4Arrays_1.2.0            mime_0.12                
 [77] survival_3.7-0            timeDate_4041.110        
 [79] iterators_1.0.14          hardhat_1.4.0            
 [81] lava_1.8.0                bluster_1.12.0           
 [83] statmod_1.5.0             ellipsis_0.3.2           
 [85] fitdistrplus_1.2-1        ipred_0.9-15             
 [87] ROCR_1.0-11               nlme_3.1-166             
 [89] bit64_4.5.2               RcppAnnoy_0.0.22         
 [91] irlba_2.3.5.1             rpart_4.1.23             
 [93] vipor_0.4.7               KernSmooth_2.23-24       
 [95] DBI_1.2.3                 colorspace_2.1-1         
 [97] nnet_7.3-19               tidyselect_1.2.1         
 [99] bit_4.5.0                 compiler_4.3.2           
[101] BiocNeighbors_1.20.2      DelayedArray_0.28.0      
[103] plotly_4.10.4             rtracklayer_1.62.0       
[105] scales_1.3.0              lmtest_0.9-40            
[107] digest_0.6.37             goftest_1.2-3            
[109] spatstat.utils_3.1-1      rmarkdown_2.29           
[111] XVector_0.42.0            htmltools_0.5.8.1        
[113] pkgconfig_2.0.3           sparseMatrixStats_1.14.0 
[115] fastmap_1.2.0             rlang_1.1.4              
[117] htmlwidgets_1.6.4         shiny_1.9.1              
[119] DelayedMatrixStats_1.24.0 farver_2.1.2             
[121] zoo_1.8-12                jsonlite_1.8.9           
[123] BiocParallel_1.36.0       ModelMetrics_1.2.2.2     
[125] R.oo_1.27.0               BiocSingular_1.18.0      
[127] RCurl_1.98-1.16           magrittr_2.0.3           
[129] modeltools_0.2-23         GenomeInfoDbData_1.2.11  
[131] dotCall64_1.2             munsell_0.5.1            
[133] viridis_0.6.5             reticulate_1.35.0        
[135] pROC_1.18.5               stringi_1.8.4            
[137] zlibbioc_1.48.2           MASS_7.3-60.0.1          
[139] org.Hs.eg.db_3.18.0       plyr_1.8.9               
[141] pkgbuild_1.4.5            ggstats_0.7.0            
[143] parallel_4.3.2            listenv_0.9.1            
[145] ggrepel_0.9.6             deldir_2.0-4             
[147] Biostrings_2.70.3         splines_4.3.2            
[149] tensor_1.5                hms_1.1.3                
[151] locfit_1.5-9.10           igraph_2.1.1             
[153] spatstat.geom_3.2-8       RcppHNSW_0.6.0           
[155] reshape2_1.4.4            ScaledMatrix_1.10.0      
[157] pkgload_1.4.0             XML_3.99-0.17            
[159] evaluate_1.0.1            scran_1.30.2             
[161] BiocManager_1.30.25       foreach_1.5.2            
[163] tzdb_0.4.0                httpuv_1.6.15            
[165] RANN_2.6.2                polyclip_1.10-7          
[167] future_1.34.0             scattermore_1.2          
[169] rsvd_1.0.5                xtable_1.8-4             
[171] restfulr_0.0.15           svMisc_1.2.3             
[173] RSpectra_0.16-2           later_1.3.2              
[175] class_7.3-22              viridisLite_0.4.2        
[177] AnnotationDbi_1.64.1      GenomicAlignments_1.38.2 
[179] memoise_2.0.1             beeswarm_0.4.0           
[181] tximport_1.28.0           cluster_2.1.6            
[183] timechange_0.3.0          globals_0.16.3           
[185] caret_6.0-94